# coding:utf-8
import scrapy
from scrapy import FormRequest

from BashouScrapy.wenshu import common
from ..pipelines import get_wfuu_db
from ..items import WenshuSummaryItem

from bs4 import BeautifulSoup


class SummaryHeader(object):
    def __init__(self):
        pass

    url = 'http://wenshu.court.gov.cn/Content/GetSummary'
    header = {
        'Host': 'wenshu.court.gov.cn',
        'Origin': 'http://wenshu.court.gov.cn',
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',
        'Accept': '*/*',
        'Accept-Language': 'zh,en-US;q=0.7,en;q=0.3',
        'Accept-Encoding': 'gzip, deflate',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'X-Requested-With': 'XMLHttpRequest',
        'Connection': 'keep-alive',
        'Pragma': 'no-cache',
        'Cache-Control': 'max-age=0',
        'isAjax': 'true',
        'Cookie':'FSSBBIl1UgzbN7N80S=awzzJVTg3KYwTvsY4TCowMXk19Q5HbSsavKVjwX96lEH1mol7.mlqONGDjRMQIHG; FSSBBIl1UgzbN7N80T=1cOnvBdlpx7xQUV8w3HphJIrkdW9ilamaeiu9Sn6rFwLaxUALgH2.0tz2f9brRXL6bfwqN6vTvkECApz6FUukKZH7u03S7noNShtkoPU2mPBCDdTIdw8vgtUgbhUcey99MvEtvVlI5wMoL4xcCbnVhobfNzpqk5DF3tBeJF7Df1Cy3bJTBOzxxp9S9mbFrInu2iH09CAevfvsWzjTls.rcSKEVXusv9oftJhYprXSImU08SQEog9mG1ZZx4X1pGf3R0X8DbqZ3Lq5nG7eV9vpNf2bhTzlAUizbn2EdqeuT2DDGD4pl8d1Vr9oP6D43wdhBTPo9Tdgg2qf.GiXBSvDMLp9; UM_distinctid=15dee183620165-0e1498570f1808-143a6d54-1fa400-15dee18362194d; wafenterurl=L1ZhbGlDb2RlL0NyZWF0ZUNvZGU=; wzwstemplate=Nw==; ccpassport=520e9c46a0702c3db5450d0d4ff0b5b3; wzwschallenge=-1; wzwsconfirm=739218503315c599502efba9ea722463; wzwsvtime=1503048197; wafverify=fcb931f6a37d7e1fff86c86ef601a333; wafcookie=22493c39e72ac14c67d99b70b5e1f82e; __utma=61363882.1781101350.1498788079.1503045766.1503049178.10; __utmc=61363882; __utmz=61363882.1498788079.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); CNZZDATA1253471353=94959603-1502933468-null%7C1503271359; _gscu_2116842793=98717279sxoxau19; _gscs_2116842793=t0327636360io4f16|pv:3; _gscbrs_2116842793=1; Hm_lvt_3f1a54c5a86d62407544d433f6418ef5=1501808651,1502693392,1502872960,1503015603; Hm_lpvt_3f1a54c5a86d62407544d433f6418ef5=1503276371',
        'Refer': 'http://wenshu.court.gov.cn/List/List?sorttype=1&conditions=searchWord+1+AJLX++%E6%A1%88%E4%BB%B6%E7%B1%BB%E5%9E%8B:%E5%88%91%E4%BA%8B%E6%A1%88%E4%BB%B6',
    }


class WenshuContentScrapy(scrapy.Spider):
    name = "wenshu_summary"
    custom_settings = {
        'ITEM_PIPELINES': {'pipelines.WenshuSummaryPipeline': 300}
    }
    crawl_db = get_wfuu_db()
    crawl_tbl = crawl_db["crawl_data"]

    def start_requests(self):
        while True:
            docs = self.crawl_tbl.find({'hasSummary': False}).sort({'_id': 1}).limit(100)
            if docs.count(with_limit_and_skip=True) == 0:
                break
            for doc in docs:
                common.init_cookies_for_content(SummaryHeader.header)
                yield FormRequest(url=SummaryHeader.url + str(doc[u"文书ID"]),formdata={'docId': doc[u"文书ID"]},meta={'_id':doc["_id"]},headers=SummaryHeader.header)


    def parse(self, response):
        item = WenshuSummaryItem()
        # text = content["Html"]
        # if text == "":
        #     text = u"文书内容为空"
        pub_date = response.meta["PubDate"]
        item["summary"] = summary
        item['_id'] = response.meta["_id"]
        yield item

